source("./utils/tianfengRwrappers.R")
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
Please use the `linewidth` argument instead.
 
#获取平均表达矩阵
aver_expr_mat <- get_averexpr_mat_cluster(ctrl_filtered)
bulk_expr_mat <- read.csv("./data_tables/DEGs_from_smartSeq/nomalized_expr_tr4Tr5Tr5_2.csv",row.names = 1)
bulk_expr_mat <- bulk_expr_mat[,!(colnames(bulk_expr_mat)%in%"Tr5_2")]
colnames(aver_expr_mat) <- levels(ctrl_filtered@active.ident)
#选高差异表达
# diff_genes <- intersect(ctrl_filtered@assays[["SCT"]]@var.features, rownames(bulk_expr_mat))
diff_genes <- intersect(rownames(aver_expr_mat), rownames(bulk_expr_mat))
aver_expr_mat <- aver_expr_mat[diff_genes,]
bulk_expr_mat <- bulk_expr_mat[diff_genes,]
# normalized_counts <-  normalized_counts[diff_genes,]

cormat <- cor(aver_expr_mat,bulk_expr_mat, method = "spearman")
pheatmap(cormat, color = colorRampPalette(c("#1E90FF", "white", "#ff2121"))(400), border_color = NA, cluster_rows = T, cluster_cols = T, angle_col = 0, main = "Correlation Heatmap")

ASP bulk RNAseq

#smartseq 交叉验证 PC in scRNA-seq

degs <- read.csv("./data_tables/DEGs_from_smartSeq/up_DEGslist.csv",header = FALSE)
ctrl_markers <- read.csv("./data_tables/ctrl_markers_fig1.csv", row.names = 1)
dhm(intersect(degs$V1,rownames(ctrl_markers)), ctrl_filtered)

#ggsave(filename = paste0("highexpr_tr5",".png"), device = png, height = 8, width = 12, plot = highexprheatmap)
# levels(Idents(ctrl_filtered))[levels(Idents(ctrl_filtered)) %in% c("TC", "ASP")] <- "ASP+TC" # 合并ASP和TC
aver_expr_mat <- get_averexpr_mat_cluster(ctrl_filtered)

#从asp vs tr45数据集
highexp_genes <- read.csv("./data_tables/DEGs_from_smartSeq/Pro vs Asp.csv", header = T, row.names = 1)
highexp_genes[is.na(highexp_genes)] <-  0
PCgenes <- highexp_genes[highexp_genes$baseMean>1000 &
                            highexp_genes$log2FoldChange>0.5,]
ASPgenes <- highexp_genes[highexp_genes$baseMean>1000 &
                            highexp_genes$log2FoldChange<(-0.5),]
#ASP,log2FoldChange<(-0.5)    Tr45,log2FoldChange>0.5 baseMean>1000

PCgenes <- intersect(rownames(PCgenes),rownames(aver_expr_mat))
Tr45highexpr <- pheatmap(aver_expr_mat[PCgenes,], cluster_cols=F, scale = "row",cluster_rows = T, show_rownames = F, color = colorRampPalette(c("#1E90FF", "white", "#ff2121"))(400), angle_col = 45, border_color = NA, main = "Tr45highexpr Heatmap")


dd <- caret::preProcess(t(aver_expr_mat[PCgenes,])) %>%
  predict(t(aver_expr_mat[PCgenes,])) %>% t()
          
pheatmap(dd, cluster_cols=F, scale = "none", cluster_rows = T, show_rownames = F, color = colorRampPalette(c("#1E90FF", "white", "#ff2121"))(400), angle_col = 45, border_color = NA, main = "Tr45highexpr Heatmap")


data = colSums(dd>1)

ggplot(data.frame(num = data, cluster = names(data))) + geom_bar(aes(cluster, fill = cluster, weight = num)) + bartheme


# ggsave(filename = paste0("ASP_tr45highexpr Heatmap",".png"), device = png, height = 8, width = 12, plot = Tr45highexpr)
df1 = data.frame(list(colSums(dd>1))) %>% lapply(., function(vec){(vec-min(vec))/(max(vec)-min(vec))}) %>% data.frame()
colnames(df1) <- list('high expr counts')
df1$label <- colnames(dd)

df2 = data.frame(list(colSums(dd))) %>% lapply(., function(vec){(vec-min(vec))/(max(vec)-min(vec))}) %>% data.frame()
colnames(df2) <- list('average expression')
df2$label <- colnames(dd)
library(ggnewscale)
names(data)
[1] "ASP" "DB"  "DT"  "SB"  "PC"  "TC"  "VB"  "LT"  "GB" 
ggplot(data = reshape2::melt(position))+
  geom_line(aes(x=variable,y=factor(value),group=index, size=rep(reshape2::melt(df)$value,2)^2,color=..size..), alpha=1)+ scale_size(range = c(1,5)) + scale_color_gradient(low = 'lightgrey', high = "#1E90FF") + new_scale_color() +
  geom_line(aes(x=variable,y=factor(value),group=index, size=rep(reshape2::melt(df2)$value,2)^2,color=..size..), alpha=0.5)+ scale_size(range = c(1,5)) + scale_color_gradient(low = 'lightgrey', high = "#FF2121") + new_scale_color() +
  geom_point(mapping = aes(x=variable,y=factor(value),color=cell_type),size=7)+ scale_y_discrete(labels=names(data), position = "right") + scale_x_discrete(expand = c(0.05,0.05))+ scale_color_manual(values=colors_list) + theme_void() +theme(axis.text.y.right = element_text())
Using cell_type, index as id variables
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.Scale for size is already present.
Adding another scale for size, which will replace the existing scale.

draw dendrogram for PC clusters

ggplot(data = plotdata) +
  geom_segment(aes(
    x = -2,
    y = 4,
    xend = y,
    yend = x,
    size = `average expression`^2,
    color = ..size..
  ),
  alpha = 0.8) + scale_size(range = c(1, 5)) + scale_color_gradient(low = '#ffffff', high = "#fd9999") + new_scale_color() +
  geom_segment(aes(
    x = -2,
    y = 4,
    xend = y,
    yend = x,
    size = `high expr counts`^2,
    color = ..size..
  ),
  alpha = 0.6) + scale_size(range = c(1, 5)) + scale_color_gradient(low = '#ffffff', high = "#b1d6fb") + new_scale_color() +
  geom_point(mapping = aes(x = y,
                           y = x,
                           color = label),
             size = 7) + geom_text(aes(x = y, y = x, label = label), size = 4) + geom_point(aes(x =
                                                                                                  -2, y = 4), color = 'lightgrey', size = 7) +
  scale_y_discrete(labels = plotdata$label, position = "right") + scale_x_discrete(expand = c(0.05, 0.05)) +
  scale_color_manual(values = color_mapping[plotdata$label]) +
  geom_segment(aes(
    x = y * 2 + 0.1,
    y = x,
    xend = yend * 2 + 0.1,
    yend = xend
  ),
  segment(ddata)) +
  theme_void() + theme(axis.text.y.right = element_text())
Scale for size is already present.
Adding another scale for size, which will replace the existing scale.

additional heatmap

pheatmap(corrmat, color = colorRampPalette(c("#1E90FF", "white", "#ff2121"))(400), border_color = NA, cluster_rows = T, cluster_cols = T, angle_col = 0, main = "Correlation Heatmap")

baseline Tr45

baseline ASP

TC

```r
aver_expr_mat <- get_averexpr_mat_cluster(TC)
colnames(aver_expr_mat) <- levels(TC@active.ident)

#从asp vs tr45数据集
all_genes <- read.csv(\./DEGs_from_smartSeq/Pro vs Asp.csv\, header = T,row.names = 1)
all_genes[is.na(all_genes)] <-  0
ASPgenes <- all_genes[all_genes$baseMean>100 & all_genes$padj<0.01 & all_genes$log2FoldChange<(-0.5),] #ASP,log2FoldChange<(-0.5)    Tr45,log2FoldChange>0.5 baseMean>1000
ASPgenes <- rownames(ASPgenes)
ASPgenes <- intersect(ASPgenes,rownames(TCsubMarkers))

Tr45highexpr <- pheatmap(aver_expr_mat[ASPgenes,], cluster_cols=F, scale = \row\,cluster_rows = T, show_rownames = F, color = colorRampPalette(c(\white\, \#ff2121\))(400), angle_col = 45, border_color = NA, main = \ASP_Tr45highexpr Heatmap\)

<!-- rnb-source-end -->

<!-- rnb-chunk-end -->


<!-- rnb-text-begin -->



# 比较SMARTseq和scRNA-seq marker基因的重合个数
## ASP

<!-- rnb-text-end -->


<!-- rnb-chunk-begin -->


```{=html}

<!-- rnb-htmlwidget-begin eyJkZXBlbmRlbmNpZXMiOlt7Im5hbWUiOiJodG1sd2lkZ2V0cyIsInZlcnNpb24iOiIxLjUuNCIsInNyYyI6eyJmaWxlIjoid3d3In0sIm1ldGEiOltdLCJzY3JpcHQiOiJodG1sd2lkZ2V0cy5qcyIsInN0eWxlc2hlZXQiOltdLCJoZWFkIjpbXSwiYXR0YWNobWVudCI6W10sInBhY2thZ2UiOiJodG1sd2lkZ2V0cyIsImFsbF9maWxlcyI6dHJ1ZX0seyJuYW1lIjoicGxvdGx5LWJpbmRpbmciLCJ2ZXJzaW9uIjoiNC4xMC4wIiwic3JjIjp7ImZpbGUiOiJodG1sd2lkZ2V0cyJ9LCJtZXRhIjpbXSwic2NyaXB0IjoicGxvdGx5LmpzIiwic3R5bGVzaGVldCI6W10sImhlYWQiOltdLCJhdHRhY2htZW50IjpbXSwicGFja2FnZSI6InBsb3RseSIsImFsbF9maWxlcyI6ZmFsc2V9LHsibmFtZSI6InR5cGVkYXJyYXkiLCJ2ZXJzaW9uIjoiMC4xIiwic3JjIjp7ImZpbGUiOiJodG1sd2lkZ2V0cy9saWIvdHlwZWRhcnJheSJ9LCJtZXRhIjpbXSwic2NyaXB0IjoidHlwZWRhcnJheS5taW4uanMiLCJzdHlsZXNoZWV0IjpbXSwiaGVhZCI6W10sImF0dGFjaG1lbnQiOltdLCJwYWNrYWdlIjoicGxvdGx5IiwiYWxsX2ZpbGVzIjpmYWxzZX0seyJuYW1lIjoianF1ZXJ5IiwidmVyc2lvbiI6IjMuNS4xIiwic3JjIjp7ImZpbGUiOiJsaWIvanF1ZXJ5In0sIm1ldGEiOltdLCJzY3JpcHQiOiJqcXVlcnkubWluLmpzIiwic3R5bGVzaGVldCI6W10sImhlYWQiOltdLCJhdHRhY2htZW50IjpbXSwicGFja2FnZSI6ImNyb3NzdGFsayIsImFsbF9maWxlcyI6dHJ1ZX0seyJuYW1lIjoiY3Jvc3N0YWxrIiwidmVyc2lvbiI6IjEuMS4xIiwic3JjIjp7ImZpbGUiOiJ3d3cifSwibWV0YSI6W10sInNjcmlwdCI6ImpzL2Nyb3NzdGFsay5taW4uanMiLCJzdHlsZXNoZWV0IjoiY3NzL2Nyb3NzdGFsay5jc3MiLCJoZWFkIjpbXSwiYXR0YWNobWVudCI6W10sInBhY2thZ2UiOiJjcm9zc3RhbGsiLCJhbGxfZmlsZXMiOnRydWV9LHsibmFtZSI6InBsb3RseS1odG1sd2lkZ2V0cy1jc3MiLCJ2ZXJzaW9uIjoiMi41LjEiLCJzcmMiOnsiZmlsZSI6Imh0bWx3aWRnZXRzL2xpYi9wbG90bHlqcyJ9LCJtZXRhIjpbXSwic2NyaXB0IjpbXSwic3R5bGVzaGVldCI6InBsb3RseS1odG1sd2lkZ2V0cy5jc3MiLCJoZWFkIjpbXSwiYXR0YWNobWVudCI6W10sInBhY2thZ2UiOiJwbG90bHkiLCJhbGxfZmlsZXMiOmZhbHNlfSx7Im5hbWUiOiJwbG90bHktbWFpbiIsInZlcnNpb24iOiIyLjUuMSIsInNyYyI6eyJmaWxlIjoiaHRtbHdpZGdldHMvbGliL3Bsb3RseWpzIn0sIm1ldGEiOltdLCJzY3JpcHQiOiJwbG90bHktbGF0ZXN0Lm1pbi5qcyIsInN0eWxlc2hlZXQiOltdLCJoZWFkIjpbXSwiYXR0YWNobWVudCI6W10sInBhY2thZ2UiOiJwbG90bHkiLCJhbGxfZmlsZXMiOmZhbHNlfV0sIm1ldGFkYXRhIjp7ImNsYXNzZXMiOlsicGxvdGx5IiwiaHRtbHdpZGdldCJdLCJzaXppbmdQb2xpY3kiOnsiZGVmYXVsdFdpZHRoIjpbIjEwMCUiXSwiZGVmYXVsdEhlaWdodCI6WzQwMF0sInBhZGRpbmciOlswXSwidmlld2VyIjp7ImRlZmF1bHRXaWR0aCI6W10sImRlZmF1bHRIZWlnaHQiOltdLCJwYWRkaW5nIjpbXSwiZmlsbCI6W3RydWVdLCJzdXBwcmVzcyI6W2ZhbHNlXSwicGFuZUhlaWdodCI6W119LCJicm93c2VyIjp7ImRlZmF1bHRXaWR0aCI6W10sImRlZmF1bHRIZWlnaHQiOltdLCJwYWRkaW5nIjpbXSwiZmlsbCI6W3RydWVdLCJleHRlcm5hbCI6W2ZhbHNlXX0sImtuaXRyIjp7ImRlZmF1bHRXaWR0aCI6W10sImRlZmF1bHRIZWlnaHQiOltdLCJmaWd1cmUiOlt0cnVlXX0sInZpZXdlci5wYWRkaW5nIjpbMF0sInZpZXdlci5maWxsIjpbdHJ1ZV19fX0= -->

<!-- htmlwidget-container-begin -->
<div id="htmlwidget-e4450e7f18c39ad5e98d" style="width:504px;height:504px;" class="plotly html-widget"></div>
<!-- htmlwidget-container-end -->
<script type="application/json" data-for="htmlwidget-e4450e7f18c39ad5e98d">{"x":{"visdat":{"460d769c0a06":["function () ","plotlyVisDat"]},"cur_data":"460d769c0a06","attrs":{"460d769c0a06":{"x":["DB","DT","SB","PC","TC","VB","LT","GB","ASP"],"y":[7,6,5,25,10,16,24,57,19],"name":"ASP_normalized","alpha_stroke":1,"sizes":[10,100],"spans":[1,20],"type":"bar"},"460d769c0a06.1":{"x":["DB","DT","SB","PC","TC","VB","LT","GB","ASP"],"y":[7,8,4,23,16,13,28,78,22],"name":"notchctrl","alpha_stroke":1,"sizes":[10,100],"spans":[1,20],"type":"bar","inherit":true},"460d769c0a06.2":{"x":["DB","DT","SB","PC","TC","VB","LT","GB","ASP"],"y":[4,6,5,19,12,16,30,76,20],"name":"ASP_TPM","alpha_stroke":1,"sizes":[10,100],"spans":[1,20],"type":"bar","inherit":true}},"layout":{"margin":{"b":40,"l":60,"t":25,"r":10},"title":"ASPtop4000","xaxis":{"domain":[0,1],"automargin":true,"title":[],"type":"category","categoryorder":"array","categoryarray":["ASP","DB","DT","GB","LT","PC","SB","TC","VB"]},"yaxis":{"domain":[0,1],"automargin":true,"title":[]},"hovermode":"closest","showlegend":true},"source":"A","config":{"modeBarButtonsToAdd":["hoverclosest","hovercompare"],"showSendToCloud":false},"data":[{"x":["DB","DT","SB","PC","TC","VB","LT","GB","ASP"],"y":[7,6,5,25,10,16,24,57,19],"name":"ASP_normalized","type":"bar","marker":{"color":"rgba(31,119,180,1)","line":{"color":"rgba(31,119,180,1)"}},"error_y":{"color":"rgba(31,119,180,1)"},"error_x":{"color":"rgba(31,119,180,1)"},"xaxis":"x","yaxis":"y","frame":null},{"x":["DB","DT","SB","PC","TC","VB","LT","GB","ASP"],"y":[7,8,4,23,16,13,28,78,22],"name":"notchctrl","type":"bar","marker":{"color":"rgba(255,127,14,1)","line":{"color":"rgba(255,127,14,1)"}},"error_y":{"color":"rgba(255,127,14,1)"},"error_x":{"color":"rgba(255,127,14,1)"},"xaxis":"x","yaxis":"y","frame":null},{"x":["DB","DT","SB","PC","TC","VB","LT","GB","ASP"],"y":[4,6,5,19,12,16,30,76,20],"name":"ASP_TPM","type":"bar","marker":{"color":"rgba(44,160,44,1)","line":{"color":"rgba(44,160,44,1)"}},"error_y":{"color":"rgba(44,160,44,1)"},"error_x":{"color":"rgba(44,160,44,1)"},"xaxis":"x","yaxis":"y","frame":null}],"highlight":{"on":"plotly_click","persistent":false,"dynamic":false,"selectize":false,"opacityDim":0.2,"selected":{"opacity":1},"debounce":0},"shinyEvents":["plotly_hover","plotly_click","plotly_selected","plotly_relayout","plotly_brushed","plotly_brushing","plotly_clickannotation","plotly_doubleclick","plotly_deselect","plotly_afterplot","plotly_sunburstclick"],"base_url":"https://plot.ly"},"evals":[],"jsHooks":[]}</script>
<!-- htmlwidget-sizing-policy-base64 PHNjcmlwdCB0eXBlPSJhcHBsaWNhdGlvbi9odG1sd2lkZ2V0LXNpemluZyIgZGF0YS1mb3I9Imh0bWx3aWRnZXQtZTQ0NTBlN2YxOGMzOWFkNWU5OGQiPnsidmlld2VyIjp7IndpZHRoIjoiMTAwJSIsImhlaWdodCI6NDAwLCJwYWRkaW5nIjowLCJmaWxsIjp0cnVlfSwiYnJvd3NlciI6eyJ3aWR0aCI6IjEwMCUiLCJoZWlnaHQiOjQwMCwicGFkZGluZyI6MCwiZmlsbCI6dHJ1ZX19PC9zY3JpcHQ+ -->

<!-- rnb-htmlwidget-end -->

PC

refdata2 <- read.csv("./data_tables/DEGs_from_smartSeq/nomalized_expr_tr4Tr5Tr5_2.csv",row.names = 1)
cluster_info <- Idents(ctrl_filtered) %>% levels()
# refdata2 <- read.csv("~/single cell-atlas of fly trachea/ASP_bulkRNAseq/Tr45_normalized_counts.csv",row.names = 1)
refdata2 <- as.matrix(rowMeans(refdata2)) 
refdata2 <- as.matrix(as.matrix(refdata2[order(refdata2,decreasing = T),])[1:800,]) 
geneset2 <- intersect(VariableFeatures(ctrl_filtered), rownames(refdata2)) 
inter <- lapply(cluster_info, func, geneset2)

plot_ly(
  x = cluster_info,
  y = as.numeric(inter),
  name = "asptrdataset", type = "bar") %>% layout(title = "PCtop800")

pheatmap(aver_expr_mat[geneset2,], cluster_cols=F, scale = "none",cluster_rows = T, show_rownames = F, color = colorRampPalette(c( "white", "#ff2121"))(400), angle_col = 45, border_color = NA, main = "PCtop800 Heatmap")
library(UpSetR)
func22 <- function(i, feature){
  intersect(ctrl_markers[ctrl_markers$cluster == i,]$gene, feature)
}

data =  lapply(cluster_info, func22, geneset2)
names(data) <- cluster_info
upset(fromList(data),nsets = length(data), sets = cluster_info)
---
title: "R Notebook"
output: html_notebook
---
```{r setup, include=FALSE}
getwd()
knitr::opts_knit$set(root.dir = "/home/zju/tianfeng/single_cell_atlas")
# setwd("/home/zju/tianfeng/single_cell_atlas")
# getwd()
```

```{r}
source("./utils/tianfengRwrappers.R")
 
#获取平均表达矩阵
aver_expr_mat <- get_averexpr_mat_cluster(ctrl_filtered)
bulk_expr_mat <- read.csv("./data_tables/DEGs_from_smartSeq/nomalized_expr_tr4Tr5Tr5_2.csv",row.names = 1)
bulk_expr_mat <- bulk_expr_mat[,!(colnames(bulk_expr_mat)%in%"Tr5_2")]
colnames(aver_expr_mat) <- levels(ctrl_filtered@active.ident)
#选高差异表达
# diff_genes <- intersect(ctrl_filtered@assays[["SCT"]]@var.features, rownames(bulk_expr_mat))
diff_genes <- intersect(rownames(aver_expr_mat), rownames(bulk_expr_mat))
aver_expr_mat <- aver_expr_mat[diff_genes,]
bulk_expr_mat <- bulk_expr_mat[diff_genes,]
# normalized_counts <-  normalized_counts[diff_genes,]

cormat <- cor(aver_expr_mat,bulk_expr_mat, method = "spearman")
pheatmap(cormat, color = colorRampPalette(c("#1E90FF", "white", "#ff2121"))(400), border_color = NA, cluster_rows = T, cluster_cols = T, angle_col = 0, main = "Correlation Heatmap")

```
# ASP bulk RNAseq
```{r}
aver_expr_mat <- get_averexpr_mat_cluster(ctrl_filtered)
ASP_bulk_exp_mat1 <- read.csv("data_tables/DEGs_from_smartSeq/ASP_normalized_counts/asp_normalized_counts.csv",row.names = 1)  %>% as.matrix()
ASP_bulk_exp_mat2 <- read.csv("data_tables/DEGs_from_smartSeq/ASP_normalized_counts/ASP_TPM.csv",row.names = 1)  %>% as.matrix()
ASP_bulk_exp_mat3 <- read.csv("data_tables/DEGs_from_smartSeq/ASP_normalized_counts/notchdataset_normalized_counts.csv",row.names = 1)  %>% as.matrix()

diff_genes <- Reduce(intersect, list(ctrl_filtered@assays[["SCT"]]@var.features,
                        rownames(ASP_bulk_exp_mat1),rownames(ASP_bulk_exp_mat2),
                        rownames(ASP_bulk_exp_mat3)))

aver_expr_mat <- aver_expr_mat[diff_genes,]
ASP_bulk_expr_mat <- cbind(ASP_bulk_exp_mat1[diff_genes,],ASP_bulk_exp_mat2[diff_genes,],
                       ASP_bulk_exp_mat3[diff_genes,])
colnames(ASP_bulk_expr_mat) <- c("ASP1","ASP2","ASP3")
cormat <- cor(aver_expr_mat,ASP_bulk_expr_mat, method = "spearman")
pheatmap(cormat, color = colorRampPalette(c("#1E90FF", "white", "#ff2121"))(400), border_color = NA, cluster_rows = T, cluster_cols = T, angle_col = 0, main = "Correlation Heatmap")
```





#smartseq 交叉验证 PC in scRNA-seq
```{r}
degs <- read.csv("./data_tables/DEGs_from_smartSeq/up_DEGslist.csv",header = FALSE)
ctrl_markers <- read.csv("./data_tables/ctrl_markers_fig1.csv", row.names = 1)
dhm(intersect(degs$V1,rownames(ctrl_markers)), ctrl_filtered)

#ggsave(filename = paste0("highexpr_tr5",".png"), device = png, height = 8, width = 12, plot = highexprheatmap)
```


```{r}
# levels(Idents(ctrl_filtered))[levels(Idents(ctrl_filtered)) %in% c("TC", "ASP")] <- "ASP+TC" # 合并ASP和TC
aver_expr_mat <- get_averexpr_mat_cluster(ctrl_filtered)

#从asp vs tr45数据集
highexp_genes <- read.csv("./data_tables/DEGs_from_smartSeq/Pro vs Asp.csv", header = T, row.names = 1)
highexp_genes[is.na(highexp_genes)] <-  0
PCgenes <- highexp_genes[highexp_genes$baseMean>1000 &
                            highexp_genes$log2FoldChange>0.5,]
ASPgenes <- highexp_genes[highexp_genes$baseMean>1000 &
                            highexp_genes$log2FoldChange<(-0.5),]
#ASP,log2FoldChange<(-0.5)    Tr45,log2FoldChange>0.5 baseMean>1000

PCgenes <- intersect(rownames(PCgenes),rownames(aver_expr_mat))
Tr45highexpr <- pheatmap(aver_expr_mat[PCgenes,], cluster_cols=F, scale = "row",cluster_rows = T, show_rownames = F, color = colorRampPalette(c("#1E90FF", "white", "#ff2121"))(400), angle_col = 45, border_color = NA, main = "Tr45highexpr Heatmap")

dd <- caret::preProcess(t(aver_expr_mat[PCgenes,])) %>%
  predict(t(aver_expr_mat[PCgenes,])) %>% t()
          
pheatmap(dd, cluster_cols=F, scale = "none", cluster_rows = T, show_rownames = F, color = colorRampPalette(c("#1E90FF", "white", "#ff2121"))(400), angle_col = 45, border_color = NA, main = "Tr45highexpr Heatmap")

data = colSums(dd>1)

ggplot(data.frame(num = data, cluster = names(data))) + geom_bar(aes(cluster, fill = cluster, weight = num)) + bartheme

# ggsave(filename = paste0("ASP_tr45highexpr Heatmap",".png"), device = png, height = 8, width = 12, plot = Tr45highexpr)
```

```{r}
df1 = data.frame(list(colSums(dd>1))) %>% lapply(., function(vec){(vec-min(vec))/(max(vec)-min(vec))}) %>% data.frame()
colnames(df1) <- list('high expr counts')
df1$label <- colnames(dd)

df2 = data.frame(list(colSums(dd))) %>% lapply(., function(vec){(vec-min(vec))/(max(vec)-min(vec))}) %>% data.frame()
colnames(df2) <- list('average expression')
df2$label <- colnames(dd)
```

```{r}
library(ggnewscale)
position <- data.frame(rep(4,9), seq(0,8,length.out=9))
position$cell_type <- rep(names(data),1)
position$index <- rownames(position)

colnames(position) <- list('left','right','cell_type','index')
reshape2::melt(position)
position
ggplot(data = reshape2::melt(position))+
  geom_line(aes(x=variable,y=factor(value),group=index, size=rep(reshape2::melt(df)$value,2)^2,color=..size..), alpha=1)+ scale_size(range = c(1,5)) + scale_color_gradient(low = 'lightgrey', high = "#1E90FF") + new_scale_color() +
  geom_line(aes(x=variable,y=factor(value),group=index, size=rep(reshape2::melt(df2)$value,2)^2,color=..size..), alpha=0.5)+ scale_size(range = c(1,5)) + scale_color_gradient(low = 'lightgrey', high = "#FF2121") + new_scale_color() +
  geom_point(mapping = aes(x=variable,y=factor(value),color=cell_type),size=7)+ scale_y_discrete(labels=names(data), position = "right") + scale_x_discrete(expand = c(0.05,0.05))+ scale_color_manual(values=colors_list) + theme_void() +theme(axis.text.y.right = element_text())
```

### draw dendrogram for PC clusters
```{r}
library(ggdendro)
corrmat <- cor(aver_expr_mat, method = "spearman")
distmat <- 1 - corrmat
distmat %<>% as.dist() # transform to dist object
ddata <- hclust(distmat) %>% as.dendrogram() %>% dendro_data(type = "rectangle")
# ggdendrogram(hclust(distmat), rotate = FALSE, size = 2)
ggplot(segment(ddata)) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
  coord_flip() + 
  scale_y_continuous(expand = c(0.2, 0))

color_mapping <- colors_list[1:9]
names(color_mapping) <- ctrl_filtered$celltype %>% levels()

position <- ddata[["labels"]]
position$left <- 4
plotdata <- Reduce(function(x, y){inner_join(x, y, by='label')}, list(position, df1, df2))

ggplot(data = plotdata) +
  geom_segment(aes(
    x = -2,
    y = 4,
    xend = y,
    yend = x,
    size = `average expression`^2,
    color = ..size..
  ),
  alpha = 0.8) + scale_size(range = c(1, 5)) + scale_color_gradient(low = '#ffffff', high = "#fd9999") + new_scale_color() +
  geom_segment(aes(
    x = -2,
    y = 4,
    xend = y,
    yend = x,
    size = `high expr counts`^2,
    color = ..size..
  ),
  alpha = 0.6) + scale_size(range = c(1, 5)) + scale_color_gradient(low = '#ffffff', high = "#b1d6fb") + new_scale_color() +
  geom_point(mapping = aes(x = y,
                           y = x,
                           color = label),
             size = 7) + geom_text(aes(x = y, y = x, label = label), size = 4) + geom_point(aes(x =
                                                                                                  -2, y = 4), color = 'lightgrey', size = 7) +
  scale_y_discrete(labels = plotdata$label, position = "right") + scale_x_discrete(expand = c(0.05, 0.05)) +
  scale_color_manual(values = color_mapping[plotdata$label]) +
  geom_segment(aes(
    x = y * 2 + 0.1,
    y = x,
    xend = yend * 2 + 0.1,
    yend = xend
  ),
  segment(ddata)) +
  theme_void() + theme(axis.text.y.right = element_text())




```
### additional heatmap
```{r fig.width=4, fig.height=3}
pheatmap(corrmat, color = colorRampPalette(c("#1E90FF", "white", "#ff2121"))(400), border_color = NA, cluster_rows = T, cluster_cols = T, angle_col = 0, main = "Correlation Heatmap")
```


# baseline Tr45
```{r fig.height=4, fig.width=6}
highexp_genes <- read.csv("./data_tables/DEGs_from_smartSeq/Tr4 vs Tr5.csv", header = T,row.names = 1)
highexp_genes[is.na(highexp_genes)] <-  0 
highexp_genes <- highexp_genes[highexp_genes$baseMean>2000,] # Tr45,log2FoldChange>0.5 baseMean>1000对应193, 2000对应120,3000对应88，5000对应58,6000对应51，10000对于34
highexp_genes <- rownames(highexp_genes)
length(intersect(highexp_genes,rownames(aver_expr_mat)))
Tr45highexpr2 <- pheatmap(aver_expr_mat[intersect(highexp_genes,rownames(aver_expr_mat)),], 
                          breaks = unique(c(seq(0,4, length=400))),
                          cluster_cols=F, scale = "none",cluster_rows = T, show_rownames = F, 
                          color = colorRampPalette(c("white", "#ff2121"))(400), 
                          angle_col = 45, border_color = NA, main = "baseline_tr45highexpr Heatmap")
#ggsave(filename = paste0("baseline_tr45highexpr Heatmap",".png"), device = png, height = 8, width = 12, plot = Tr45highexpr2)
```

# baseline ASP
```{r fig.height=4, fig.width=6}
highexp_genes <- read.csv("./data_tables/DEGs_from_smartSeq/Pro vs Asp.csv", header = T,row.names = 1)
highexp_genes[is.na(highexp_genes)] <-  0
highexp_genes <- highexp_genes[highexp_genes$baseMean>1000,]
highexp_genes <- rownames(highexp_genes)
length(intersect(highexp_genes,rownames(aver_expr_mat)))
pheatmap(aver_expr_mat[intersect(highexp_genes,rownames(aver_expr_mat)),], 
                          breaks = unique(c(seq(0,4, length=400))),
                          cluster_cols=F, scale = "none",cluster_rows = T, show_rownames = F, 
                          color = colorRampPalette(c("white", "#ff2121"))(400), 
                          angle_col = 45, border_color = NA, main = "baseline_Asphighexpr Heatmap")
#ggsave(filename = paste0("baseline_tr45highexpr Heatmap",".png"), device = png, height = 8, width = 12, plot = Tr45highexpr2)
```


## TC
```{r}
ctrl_filtered <- FindNeighbors(ctrl_filtered)
# DimPlot(larva,label = T, cells.highlight = WhichCells(larva_wt,ident= "DT")) + xlim(-15,15) + ylim(-15,15)

larva <- FindSubCluster(ctrl_filtered, "ASP+TC","SCT_snn",resolution = 0.3)
umapplot(larva, group.by = "sub.cluster")

TC <- subset(larva,idents = "ASP+TC")
Idents(TC) <- TC$sub.cluster
umapplot(TC, group.by = "sub.cluster") + scale_y_continuous(limits = c(-2,8), breaks = NULL) +  scale_x_continuous(limits = c(-8,2), breaks = NULL)

#子群的marker
TCsubMarkers <- FindAllMarkers(TC, min.pct = 0.2, logfc.threshold = 0.5, only.pos = F)
TC_3markers <- FindMarkers(TC, ident.1 = "ASP+TC_2", only.pos = F, logfc.threshold = 0.5)

dhm(rownames(TCsubMarkers),seuobj = TC) 


TCavermat <- get_averexpr_mat_cluster(TC)
refdata <- read.csv("./data_tables/DEGs_from_smartSeq/ASP_normalized_counts/asp_normalized_counts.csv",row.names = 1)
refdata <- as.matrix(rowMeans(refdata)) #按行取平均
refdata <- as.matrix(as.matrix(refdata[order(refdata,decreasing = T),])[1:1000,]) #bulk RNA-seq的前1000个最高表达的基因
# geneset <- intersect(rownames(TCavermat), rownames(refdata))
refdata <- as.matrix(refdata[geneset,])

colnames(refdata) <- "ref"

alldata <- cbind(refdata,TCavermat[geneset,])
normalized <- scale(alldata,center = T,scale = T)
cormat <- cor(normalized,method = "spearman")

pheatmap(cormat,cluster_rows = T,breaks = unique(c(seq(0,1, length=400))), color = colorRampPalette(c("#1E90FF", "white", "#ff2121"))(400),border_color = NA,cluster_cols = T,main = "corheatmapTC")


pheatmap(TCavermat[PCgenes,], breaks = unique(c(seq(0,4, length=400))),
                          cluster_cols=F, scale = "none",cluster_rows = T, show_rownames = F, 
                          color = colorRampPalette(c("white", "#ff2121"))(400), 
                          angle_col = 45, border_color = NA, main = "ASP_Trhighexpr Heatmap")
```

```{r}
aver_expr_mat <- get_averexpr_mat_cluster(TC)
colnames(aver_expr_mat) <- levels(TC@active.ident)

#从asp vs tr45数据集
all_genes <- read.csv("./DEGs_from_smartSeq/Pro vs Asp.csv", header = T,row.names = 1)
all_genes[is.na(all_genes)] <-  0
ASPgenes <- all_genes[all_genes$baseMean>100 & all_genes$padj<0.01 & all_genes$log2FoldChange<(-0.5),] #ASP,log2FoldChange<(-0.5)    Tr45,log2FoldChange>0.5 baseMean>1000
ASPgenes <- rownames(ASPgenes)
ASPgenes <- intersect(ASPgenes,rownames(TCsubMarkers))

Tr45highexpr <- pheatmap(aver_expr_mat[ASPgenes,], cluster_cols=F, scale = "row",cluster_rows = T, show_rownames = F, color = colorRampPalette(c("white", "#ff2121"))(400), angle_col = 45, border_color = NA, main = "ASP_Tr45highexpr Heatmap")

```


# 比较SMARTseq和scRNA-seq marker基因的重合个数
## ASP
```{r}
topn = 4000
refdata0 <- read.csv("./data_tables/DEGs_from_smartSeq/ASP_normalized_counts/asp_normalized_counts.csv", header = T,row.names = 1)
refdata0 <- as.matrix(rowMeans(refdata0)) #按行取平均
refdata0 <- as.matrix(as.matrix(refdata0[order(refdata0,decreasing = T),])[1:topn,]) 
geneset0 <- intersect(VariableFeatures(ctrl_filtered), rownames(refdata0)) 

refdata1 <- read.csv("./data_tables/DEGs_from_smartSeq/ASP_normalized_counts/notchdataset_normalized_counts.csv",row.names = 1)
refdata1 <- as.matrix(rowMeans(refdata1)) #按行取平均
refdata1 <- as.matrix(as.matrix(refdata1[order(refdata1,decreasing = T),])[1:topn,]) 
geneset1 <- intersect(VariableFeatures(ctrl_filtered), rownames(refdata1)) 

refdata2 <- read.csv("./data_tables/DEGs_from_smartSeq/ASP_normalized_counts/ASP_TPM.csv",row.names = 1)
refdata2 <- as.matrix(rowMeans(refdata2)) 
refdata2 <- as.matrix(as.matrix(refdata2[order(refdata2,decreasing = T),])[1:topn,]) 
geneset2 <- intersect(VariableFeatures(ctrl_filtered), rownames(refdata2)) 

cluster_info <- Idents(ctrl_filtered) %>% levels()

# ctrl_markers <- FindAllMarkers(ctrl, logfc.threshold = log(1.2), min.diff.pct = 0.1)
# ctrl_markers <- ctrl_markers %>% group_by(cluster) %>% slice_max(n = 400, order_by = avg_logFC)


func <- function(i, feature){
  length(intersect(ctrl_markers[ctrl_markers$cluster == i,]$gene, feature))
}
inter_1 <- lapply(cluster_info, func, geneset0)
inter_2 <- lapply(cluster_info, func, geneset1)
inter_3 <- lapply(cluster_info, func, geneset2)


plot_ly(
  x = cluster_info, y = as.numeric(inter_1), name = 'ASP_normalized', type = 'bar') %>%
  add_trace(y = as.numeric(inter_2), name = 'notchctrl') %>% 
  add_trace(y = as.numeric(inter_3), name = 'ASP_TPM')%>% layout(title = paste0("ASPtop", as.character(topn)))


```

## PC
```{r}
refdata2 <- read.csv("./data_tables/DEGs_from_smartSeq/nomalized_expr_tr4Tr5Tr5_2.csv",row.names = 1)
cluster_info <- Idents(ctrl_filtered) %>% levels()
# refdata2 <- read.csv("~/single cell-atlas of fly trachea/ASP_bulkRNAseq/Tr45_normalized_counts.csv",row.names = 1)
refdata2 <- as.matrix(rowMeans(refdata2)) 
refdata2 <- as.matrix(as.matrix(refdata2[order(refdata2,decreasing = T),])[1:800,]) 
geneset2 <- intersect(VariableFeatures(ctrl_filtered), rownames(refdata2)) 
inter <- lapply(cluster_info, func, geneset2)

plot_ly(
  x = cluster_info,
  y = as.numeric(inter),
  name = "asptrdataset", type = "bar") %>% layout(title = "PCtop800")

pheatmap(aver_expr_mat[geneset2,], cluster_cols=F, scale = "none",cluster_rows = T, show_rownames = F, color = colorRampPalette(c( "white", "#ff2121"))(400), angle_col = 45, border_color = NA, main = "PCtop800 Heatmap")

```
```{r}
library(UpSetR)
func22 <- function(i, feature){
  intersect(ctrl_markers[ctrl_markers$cluster == i,]$gene, feature)
}

data =  lapply(cluster_info, func22, geneset2)
names(data) <- cluster_info
upset(fromList(data),nsets = length(data), sets = cluster_info)
```